# ------------------------------------------------------------------------------
# Reports various outcome stats cited in paper
# Author: Cassidy Shubatt <cshubatt@gmail.com>
# To run: bash 08_report_outcome_stats.sh
# ------------------------------------------------------------------------------

# Libraries --------------------------------------------------------------------
library(here)
library(yaml)
library(data.table)
library(tidyverse)
library(glue)
library(testit) # other()
library(broom)
library(lfe) # felm()

u <- modules::use(here::here("lib", "util.R"))
overnight_lab <- ""
temp <- here::here(
  "code", "03_score_diagnostics", "temp",
  "08_report_outcome_stats"
)
if (!dir.exists(temp)) {
  dir.create(temp)
}

# Load Data --------------------------------------------------------------------
message("Loading data...")
paths <- read_yaml(here::here("lib", "filepaths.yml"))
subsample_flags <- readRDS(paths$analysis$subsample_flags)
test_cohort <- readRDS(glue(paths$analysis$test_cohort)) %>%
  filter(!exclude) %>%
  u$safe_left_join(subsample_flags)

full_cohort <- readRDS(glue(paths$analysis$full_cohort)) %>%
  filter(!exclude)

num_tested <- sum(test_cohort$test_010_day)
untested <- filter(test_cohort, !test_010_day)
tested <- filter(test_cohort, test_010_day)

message("Tested set is ", nrow(tested)/nrow(untested), "times the size of the untested set.")
message("")

# Test rates -------------------------------------------------------------------
message("Reporting test rates...")
test_rates <- test_cohort %>%
  group_by(tile_stent_or_cabg_010_tested) %>%
  summarize(n = n(), test_rate = mean(test_010_day)) %>%
  ungroup
print(test_rates, n = 10)

message("Reporting num caths, stress tests in full cohort...")

cath <- full_cohort$cath_010_day
stress <- full_cohort$stress_010_day

message("Num caths: ", sum(cath))
message("Num stress: ", sum(stress))
message("Num stress & cath: ", sum(stress & cath))
message("")

# Yield rates -------------------------------------------------------------------
message("Reporting yield rates...")
message(
  "Overall yield rate (holdout): ", mean(tested$stent_or_cabg_010_day)
)
message(
  "Overall mean predicted risk (tested holdout): ",
  mean(tested$p__ensemble__stent_or_cabg_010_day__tested)
)
message(
  "Overall mean predicted risk (untested holdout): ",
  mean(untested$p__ensemble__stent_or_cabg_010_day__tested)
)
pred_yields <- sum(untested$p__ensemble__stent_or_cabg_010_day__tested)
pct_untested_yields <- pred_yields/(pred_yields + sum(tested$stent_or_cabg_010_day))
message("Naive exp yields in untested: ", pred_yields)
message("Num actual yields in tested: ", sum(tested$stent_or_cabg_010_day))
message(
  "Pct of yields 'missed' according to naive risk prediction: ",
  pct_untested_yields
)

yield_rates <- tested %>%
  group_by(tile_stent_or_cabg_010_tested) %>%
  summarize(n = n(), yield_rate = mean(stent_or_cabg_010_day)) %>%
  ungroup
print(yield_rates, n = 10)
message("")

# Riskiest N patients ----------------------------------------------------------
message("")
riskiest_n <- test_cohort %>%
  mutate(risk_rank = rank(-p__ensemble__stent_or_cabg_010_day__tested)) %>%
  filter(risk_rank <= num_tested)
message(
  "Riskiest ", nrow(riskiest_n), "patients = ",
  100*nrow(riskiest_n)/nrow(test_cohort), "% of holdout"
)
message(
  "10d test rate in ", nrow(riskiest_n), " riskiest patients: ",
  mean(riskiest_n$test_010_day)
)
message(
  "Mean predicted risk in ", nrow(riskiest_n), " riskiest patients: ",
  mean(riskiest_n$p__ensemble__stent_or_cabg_010_day__tested)
)
message("")

# Untested stats ---------------------------------------------------------------
message("Adverse outcome stats in unrestricted, untested sample")
message("Num untested: ", nrow(untested))

message("Overall rates: ")
message("MACE & Pos Tn: ", mean(untested$macetrop_030_pos))
message("Death: ", mean(untested$death_030_day))
message(
  "MACE & Pos Tn | Death: ",
  mean(untested$death_030_day | untested$macetrop_030_pos)
)
message("")

# Same-day AMI -----------------------------------------------------------------
cohort_unt_ami <- readRDS(glue(paths$analysis$test_cohort)) %>%
  filter(!excl_flag_chronic & !excl_flag_c_int & !excl_flag_death) %>%
  filter(age_at_admit < 80) %>%
  filter(!test_010_day & ami_day_of)

untested_ami <- nrow(cohort_unt_ami)
untested_ami_rate <- (untested_ami/nrow(untested)) %>% round(digits = 3)
times_tested_ami <- (untested_ami/nrow(tested)) %>% round(digits = 3)
message(
  "Rate untested with sameday AMI: ", untested_ami_rate, "(", times_tested_ami,
  "times size of tested set)"
)

# Same-day Troponin ------------------------------------------------------------
cohort_unt_tn <- readRDS(glue(paths$analysis$test_cohort)) %>%
  filter(!excl_flag_chronic & !excl_flag_c_int & !excl_flag_death) %>%
  filter(age_at_admit < 80) %>%
  filter(!test_010_day & maxtrop_sameday > 0)

untested_tn <- nrow(cohort_unt_tn)
untested_tn_rate <- (untested_tn/nrow(untested)) %>% round(digits = 3)
times_tested_tn <- (untested_tn/nrow(tested)) %>% round(digits = 3)
message(
  "Rate untested with sameday positive troponin: ", untested_tn_rate,
  "(", times_tested_tn, "times size of tested set)"
)

# MACE | Death in top, bottom quartiles ----------------------------------------
message("")
top_quartile <- filter(untested, tile_stent_or_cabg_004_tested == 4)
bottom_quartile <- filter(untested, tile_stent_or_cabg_004_tested == 1)

fit <- felm(macetrop_pos_or_death_030 ~ 1 | 0 | 0 | ptid, top_quartile) %>% tidy()
rate <- fit$estimate[1] %>% round(digits = 4)
se <- fit$std.error[1] %>% round(digits = 4)
message(
  "MACE & Pos Tn | Death in top quartile (tested): ",
  rate, "(SE = ", se, ", n = ", nrow(top_quartile), ")"
)

fit <- felm(macetrop_pos_or_death_030 ~ 1 | 0 | 0 | ptid, bottom_quartile) %>% tidy()
rate <- fit$estimate[1] %>% round(digits = 4)
se <- fit$std.error[1] %>% round(digits = 4)
message(
  "MACE & Pos Tn | Death in bottom quartile (tested): ",
  rate, "(SE = ", se, ", n = ", nrow(bottom_quartile), ")"
)

# Decile adverse rates ---------------------------------------------------------
message("")
message("Outcome rates by decile-tested:")
dec_rates <- untested %>%
  group_by(tile_stent_or_cabg_010_tested) %>%
  summarize(
    n = n(),
    pct_of_unt = n() / nrow(untested),
    macetrop_pos_rate = mean(macetrop_030_pos),
    death_rate = mean(death_030_day),
    macetrop_or_death_rate = mean(macetrop_pos_or_death_030)
  ) %>%
  setnames("tile_stent_or_cabg_010_tested", "tile") %>%
  ungroup()

print(dec_rates, n = 10)

message("")
message("Outcome rates by quartile-tested:")
quart_rates <- untested %>%
  group_by(tile_stent_or_cabg_004_tested) %>%
  summarize(
    n = n(),
    pct_of_unt = n() / nrow(untested),
    macetrop_pos_rate = mean(macetrop_030_pos),
    death_rate = mean(death_030_day),
    macetrop_or_death_rate = mean(macetrop_pos_or_death_030)
  ) %>%
  setnames("tile_stent_or_cabg_004_tested", "tile") %>%
  ungroup()

print(quart_rates, n = 4)

message("")
message("Outcome rates by decile-untested:")
dec_rates <- untested %>%
  group_by(tile_stent_or_cabg_010_untested) %>%
  summarize(
    macetrop_pos_rate = mean(macetrop_030_pos),
    death_rate = mean(death_030_day),
    macetrop_or_death_rate = mean(macetrop_030_pos | death_030_day)
  ) %>%
  ungroup()

print(dec_rates, n = 10)

# Top 4 Bins -------------------------------------------------------------------
message("")
message("Top 4 bins (tested):")
top_n <- filter(test_cohort, tile_stent_or_cabg_010_tested > 6)
message("Total pts in top 4 bins: ", nrow(top_n))
message("Num tested in top 4 bins: ", sum(top_n$test_010_day))
message("Num untested in top 4 bins: ", sum(!top_n$test_010_day))
message("Pct of untested in top 4 bins: ", sum(!top_n$test_010_day)/nrow(untested))

# Bottom half of untested ------------------------------------------------------
message("")
bottom_half <- untested %>%
  filter(tile_stent_or_cabg_010_untested <= 5)
message(
  "MACE & Pos Tn | Death in untested w risk in bottom half of distn: ",
  mean(bottom_half$macetrop_pos_or_death_030)
)

# Untested of same size as tested ----------------------------------------------
message("")
riskiest_untested <- untested %>%
  mutate(risk_rank = rank(-p__ensemble__stent_or_cabg_010_day__tested)) %>%
  filter(risk_rank <= num_tested)
message("30d adverse outcome rates in ", nrow(riskiest_untested), " riskiest untested patients:")
message("MACE & Pos Tn: ", mean(riskiest_untested$macetrop_030_pos))
message("Death: ", mean(riskiest_untested$death_030_day))
message(
  "MACE & Pos Tn | Death: ",
  mean(riskiest_untested$macetrop_pos_or_death_030)
)

# Naive risk > 0.02 ------------------------------------------------------------
message("")
above_02 <- untested %>%
  filter(p__ensemble__stent_or_cabg_010_day__tested > 0.02)
message("Pct of untested w/naive risk > 0.02: ", nrow(above_02) / nrow(untested))
message(
  "MACE & Pos Tn | Death in untested w risk > 0.02: ",
  mean(above_02$macetrop_pos_or_death_030)
)

# Pct high/low risk patients with/without ECG, troponin ------------------------
message("")
pct_ecg <- mean(top_quartile$has_ecg)
pct_no_tn <- mean(is.na(top_quartile$maxtrop_sameday))
pct_ecg_lo <- mean(bottom_quartile$has_ecg)
pct_no_tn_lo <- mean(is.na(bottom_quartile$maxtrop_sameday))
message("Pct of high-risk (Q4) untested w/o ECG: ", 1 - pct_ecg)
message("Pct of high-risk (Q4) untested w/o troponin: ", pct_no_tn)
message("Pct of low-risk (Q1) untested w/o ECG: ", 1 - pct_ecg_lo)
message("Pct of low-risk (Q1) untested w/o troponin: ", pct_no_tn_lo)

# Exclusions -------------------------------------------------------------------
message("")
message("Getting adverse outcome stats in restricted untested samples...")
for (restr in c("noecg", "not_sameday_tn", "not_admit_sym")) {
  message("")
  message("Results excluding ", restr, ":")
  pct_excl <- mean(!untested[[restr]]) %>% round(digits=3)
  message("Pct of untested excluded with ",restr, " restriction", pct_excl)
  restr_df <- untested[untested[[restr]], ]

  top_quartile <- filter(restr_df, tile_stent_or_cabg_004_tested == 4)

  fit <- felm(macetrop_pos_or_death_030 ~ 1 | 0 | 0 | ptid, top_quartile) %>% tidy()
  rate <- fit$estimate[1] %>% round(digits = 4)
  se <- fit$std.error[1] %>% round(digits = 4)
  message(
    "MACE & Pos Tn | Death in top quartile (tested): ",
    rate, "(SE = ", se, ", n = ", nrow(top_quartile), ")"
  )

  message("")
  message("Deciles-untested")
  dec_rates <- restr_df %>%
    group_by(tile_stent_or_cabg_010_untested) %>%
    summarize(
      macetrop_pos_rate = mean(macetrop_030_pos),
      death_rate = mean(death_030_day),
      macetrop_or_death_rate = mean(macetrop_pos_or_death_030)
    ) %>%
    ungroup()

  print(dec_rates, n = 10)
  message("")

  quart_rates <- restr_df %>%
    group_by(tile_stent_or_cabg_004_tested) %>%
    summarize(
      macetrop_pos_rate = mean(macetrop_030_pos),
      death_rate = mean(death_030_day),
      macetrop_or_death_rate = mean(macetrop_pos_or_death_030)
    ) %>%
    ungroup()

  quint_rates <- restr_df %>%
    group_by(tile_stent_or_cabg_005_tested) %>%
    summarize(
      macetrop_pos_rate = mean(macetrop_030_pos),
      death_rate = mean(death_030_day),
      macetrop_or_death_rate = mean(macetrop_pos_or_death_030)
    ) %>%
    ungroup()

  if(restr == "not_admit_sym"){
    message("Quintiles-tested")
    print(quint_rates, n = 5)
  }else{
    message("Quartiles-tested")
    print(quart_rates, n = 4)
  }
}

# Estimate undertesting --------------------------------------------------------
message("Undertesting according to 2% miss rate...")
adverse_realized <- u$get_mean_outcomes(
  population = "untested", outcome = "macetrop_pos_or_death_030",
  x_var = "tile_stent_or_cabg_100_untested", df = untested
)
adverse_predicted <- u$get_mean_outcomes(
  population = "untested",
  outcome = "p__ensemble__stent_or_cabg_010_day__tested",
  x_var = "tile_stent_or_cabg_100_untested", df = untested
)

message(
  "Rate of undertesting according to realized outcomes: ",
  sum(adverse_realized$beta_lo > 0.02), "%"
)
message(
  "Rate of undertesting (0.02 threshold) according to predicted risk: ",
  sum(adverse_predicted$beta_lo > 0.02), "%"
)

message("")
message("Done.")
